
#include <Eigen/Core>
#include <bench/BenchTimer.h>
#include <iostream>

using namespace Eigen;
using namespace std;

#define END 9

template<int S>
struct map_size
{
	enum
	{
		ret = S
	};
};
template<>
struct map_size<10>
{
	enum
	{
		ret = 20
	};
};
template<>
struct map_size<11>
{
	enum
	{
		ret = 50
	};
};
template<>
struct map_size<12>
{
	enum
	{
		ret = 100
	};
};
template<>
struct map_size<13>
{
	enum
	{
		ret = 300
	};
};

template<int M, int N, int K>
struct alt_prod
{
	enum
	{
		ret = M == 1 && N == 1 ? InnerProduct
			  : K == 1		   ? OuterProduct
			  : M == 1		   ? GemvProduct
			  : N == 1		   ? GemvProduct
							   : GemmProduct
	};
};

void
print_mode(int mode)
{
	if (mode == InnerProduct)
		std::cout << "i";
	if (mode == OuterProduct)
		std::cout << "o";
	if (mode == CoeffBasedProductMode)
		std::cout << "c";
	if (mode == LazyCoeffBasedProductMode)
		std::cout << "l";
	if (mode == GemvProduct)
		std::cout << "v";
	if (mode == GemmProduct)
		std::cout << "m";
}

template<int Mode, typename Lhs, typename Rhs, typename Res>
EIGEN_DONT_INLINE void
prod(const Lhs& a, const Rhs& b, Res& c)
{
	c.noalias() += typename ProductReturnType<Lhs, Rhs, Mode>::Type(a, b);
}

template<int M, int N, int K, typename Scalar, int Mode>
EIGEN_DONT_INLINE void
bench_prod()
{
	typedef Matrix<Scalar, M, K> Lhs;
	Lhs a;
	a.setRandom();
	typedef Matrix<Scalar, K, N> Rhs;
	Rhs b;
	b.setRandom();
	typedef Matrix<Scalar, M, N> Res;
	Res c;
	c.setRandom();

	BenchTimer t;
	double n = 2. * double(M) * double(N) * double(K);
	int rep = 100000. / n;
	rep /= 2;
	if (rep < 1)
		rep = 1;
	do {
		rep *= 2;
		t.reset();
		BENCH(t, 1, rep, prod<CoeffBasedProductMode>(a, b, c));
	} while (t.best() < 0.1);

	t.reset();
	BENCH(t, 5, rep, prod<Mode>(a, b, c));

	print_mode(Mode);
	std::cout << int(1e-6 * n * rep / t.best()) << "\t";
}

template<int N>
struct print_n;
template<int M, int N, int K>
struct loop_on_m;
template<int M, int N, int K, typename Scalar, int Mode>
struct loop_on_n;

template<int M, int N, int K>
struct loop_on_k
{
	static void run()
	{
		std::cout << "K=" << K << "\t";
		print_n<N>::run();
		std::cout << "\n";

		loop_on_m<M, N, K>::run();
		std::cout << "\n\n";

		loop_on_k<M, N, K + 1>::run();
	}
};

template<int M, int N>
struct loop_on_k<M, N, END>
{
	static void run() {}
};

template<int M, int N, int K>
struct loop_on_m
{
	static void run()
	{
		std::cout << M << "f\t";
		loop_on_n<M, N, K, float, CoeffBasedProductMode>::run();
		std::cout << "\n";

		std::cout << M << "f\t";
		loop_on_n<M, N, K, float, -1>::run();
		std::cout << "\n";

		loop_on_m<M + 1, N, K>::run();
	}
};

template<int N, int K>
struct loop_on_m<END, N, K>
{
	static void run() {}
};

template<int M, int N, int K, typename Scalar, int Mode>
struct loop_on_n
{
	static void run()
	{
		bench_prod<M, N, K, Scalar, Mode == -1 ? alt_prod<M, N, K>::ret : Mode>();

		loop_on_n<M, N + 1, K, Scalar, Mode>::run();
	}
};

template<int M, int K, typename Scalar, int Mode>
struct loop_on_n<M, END, K, Scalar, Mode>
{
	static void run() {}
};

template<int N>
struct print_n
{
	static void run()
	{
		std::cout << map_size<N>::ret << "\t";
		print_n<N + 1>::run();
	}
};

template<>
struct print_n<END>
{
	static void run() {}
};

int
main()
{
	loop_on_k<1, 1, 1>::run();

	return 0;
}
